* Born and Breitung (2016) QP/LM-test for serial correlation
*! Version 1.1.1  06apr2018
* Contact jesse.wursten@kuleuven.be for bug reports/inquiries.

* Changelog
** 06apr2018: Reference to Stata Journal article added.
** 21nov2016: Crucial bug fix! Test was calculating the mean incorrectly if data were unbalanced (due to row-wise deletion in mata).
** 9nov2016: Crucial bug fix! Test would use completely wrong values if data were unbalanced.
** 16sep2016: Thanks to Sebastian Kripfganz for spotting an error.

/*
cap mata mata drop qp_statistic()
cap mata mata drop qp_statistic_unbalanced()

cap mata mata drop lm_statistic()
cap mata mata drop lm_statistic_unbalanced()
*/

program define xtqptest, rclass
	version 12
	preserve
	
	** Technicalities
	syntax [varlist(default=none)] [if] [in], [Lags(numlist integer max=1) ORder(numlist integer max=1) force]
	local p = "`lags'"
	local k = "`order'"
	
	*** Identify test procedure to use
	if "`p'" != "" & "`k'" != "" {
		noisily di as error "Currently you cannot specify both a lags and an order option, please execute command separately."
		exit
	}
	if "`p'" == "" & "`k'" == "" local p = 2
	
	if "`p'" != "" {
		local testFull = "Q(`p')"
		local test = "Q(p)"
	}
	else if "`k'" != "" {
		local testFull = "LM(`k')"
		local test = "LM(k)"
	}
	
	*** Postestimation?
	tempvar residuals
	if "`varlist'" == "" {
		predict `residuals', ue
		local varlist = "`residuals'"
		local postEstimation = "1"
	}
	
	** Fill out sample for mata (we reshape in Mata assuming a rectangular sample)
	tsfill, full
	
	*** Mark out if/in restrictions
	marksample toUse, novarlist

	
	*** Obtain time and panel variables
	qui xtset
	local panelvar = r(panelvar)
	local timevar = r(timevar)
			
	** Print results
	if "`postEstimation'" == "" di as result _newline "Bias-corrected Born and Breitung (2016) `test'-test on variables `varlist'"
	else						di as result _newline "Bias-corrected Born and Breitung (2016) `test'-test as postestimation"
	di as text "Panelvar: `panelvar'"
	di as text "Timevar: `timevar'"
	if "`p'" != ""				di as text "p (lags): `p'"
	if "`k'" != ""				di as text "k (order): `k'"
	di as text "{hline 30}{c TT}{hline 23}{c TT}{hline 16}{c TT}{hline 14}{c TRC}"
	di as text _col(2) %~28s = "Variable"  _col(30) " {c |}" _skip(1) "`test'-stat" _col(45) "p-value" _skip(2) " {c |}" _skip(6) "N" _skip(4) "maxT" " {c |}" %~14s = "balance?" "{c |}" 
	di as text "{hline 30}{c +}{hline 23}{c +}{hline 16}{c +}{hline 14}{c RT}"

	** Calculate statistic
	tempname stat pvalue obsCount
	tempvar t id toUse2
	local j = 1
	foreach var of local varlist {
		*** Clear locals
		local balance = ""
		local errorMessage = ""
		local tooShort = ""
		local minT = ""
		
		*** Obtain number of time and panel units
		qui egen `t' = group(`timevar') if `toUse' == 1 & ~missing(`var')
		sum `t', meanonly
		local timelength = r(max)
		
		qui egen `id' = group(`panelvar') if `toUse' == 1 & ~missing(`var')
		sum `id', meanonly
		
		local panelunits = r(max)
		
		***** Tag empty panels
		qui bysort `panelvar': egen `obsCount' = count(`var')
				
		*** Check balancedness
		local balance = ""
		
		**** Unbalanced
		qui count if ~missing(`var') & `toUse' == 1
		local totalObsUsed = r(N)
		local requiredObs = `timelength'*`panelunits'
		
		if `totalObsUsed' != `requiredObs' 	local balance = "unbalanced"
		
		**** With gaps
		tempvar gap
		qui bysort `panelvar' (`timevar'): gen `gap' = 1 if missing(`var') & ~missing(L.`var', F.`var') & `toUse' == 1
		qui count if `gap' == 1
		if r(N) != 0 local balance = "gaps (error)"
		
		**** Balanced
		if "`balance'" == "" local balance = "balanced"
		if "`force'" != "" & "`balance'" == "" local balance = "unbalanced"
		
		*** Unless force is specified ...
		if "`force'" == "" & "`p'" != "" {	
			* Test if residuals include the fixed effect
			tempvar mean_resid
			qui bysort `panelvar' (`timevar'): egen `mean_resid' = mean(`var') if `toUse' == 1
			qui sum `mean_resid' if `toUse' == 1
			drop `mean_resid'
			if r(sd) < 0.001 | abs(r(max)) < 0.001 | abs(r(min)) < 0.001 {
				*noisily di as error _col(2) %~28s = abbrev("`var'",28) 
				noisily di as error "`var': " _continue
				noisily di as error _col(4) "Residuals do not appear to include the fixed effect."
				noisily di as error _col(4) "This test is made to function with ue = c_i + e_it."
				noisily di as error _col(4) "If you are sure that your residuals do indeed include" _newline _col(4) "the fixed effect (programming bugs happen),"
				noisily di as error _col(4) "specify 'force' to skip this test."
				continue
			}
			
			* Test if T is sufficiently long
			tempvar nonMissing
			qui bysort `panelvar' (`timevar'): egen `nonMissing' = count(`var') if `obsCount' > 0
			sum `nonMissing', meanonly
			local minT = r(min)
			if `minT' <= `p' local tooShort = "tooShort"
			
			drop `nonMissing'
		}
		
		*** Update toUse variable 
		**** Exclude missings for the balanced case
		qui gen `toUse2' = `toUse'
		qui replace `toUse2' = 0 if missing(`var')
		
		**** Make sure unbalanced case performs properly
		qui replace `toUse' = 0 if `obsCount' == 0
		drop `obsCount'
		

		** Calculate statistics
		sort `panelvar' `timevar'

		*** Q(p) test ("upto order")
		if "`test'" == "Q(p)" & "`tooShort'" == "" {
			if "`balance'" == "balanced" mata: qp_statistic("`var'", "`toUse2'", `timelength', `panelunits', `p')
			else if "`balance'" == "unbalanced" mata: qp_statistic_unbalanced("`var'", "`toUse'", `panelunits', `p')
			
			scalar `stat' = round(QP, 0.001)
			scalar `pvalue' = round(pvalue, 0.001)
			
			else if "`balance'" == "gaps (error)" {
				scalar QP = .
				scalar `stat' = .
				scalar pvalue = .
				scalar `pvalue' = .
			}
		}
		
		if "`test'" == "Q(p)" & "`tooShort'" != "" {
			scalar QP = .
			scalar `stat' = .
			scalar pvalue = .
			scalar `pvalue' = .
			local errorMessage "At least one panel is too short, i.e. minT(`minT') <= lags(`p')"
		}
		
		*** LM(k) test ("of order")
		if "`test'" == "LM(k)" {
			if "`balance'" == "balanced" mata: lm_statistic("`var'", "`toUse2'", `panelunits', `k')
			
			else if "`balance'" == "unbalanced" mata: lm_statistic("`var'", "`toUse'", `panelunits', `k')
			
			scalar `stat' = round(LM, 0.001)
			scalar `pvalue' = round(pvalue, 0.001)
			
			else if "`balance'" == "gaps (error)" {
				scalar LM = .
				scalar `stat' = .
				scalar pvalue = .
				scalar `pvalue' = .
			}
		}
		
		** Display results
		if "`postEstimation'" == "1" local var = "Post Estimation"
		if "`errorMessage'" == "" di _col(2) %~28s = abbrev("`var'",28) _col(30)  " {c +}" _skip(3) %4.2f = `stat' _col(46) %4.3f = `pvalue' _col(55) "{c +}" %7.0f = `panelunits' %8.0f = `timelength' " {c +}" %~14s = "`balance'" "{c RT}"
		if "`errorMessage'" != "" di as error _col(2) "`var': `errorMessage'"
		
		** Return matrix, scalars and pvalue
		if "`test'" == "LM(k)" {
			mat lms = (nullmat(stats), LM)
			return scalar lm`j' = LM
		}
		if "`test'" == "Q(p)" {
			mat qps = (nullmat(stats), QP)
			return scalar qp`j' = QP
		}
		
		mat ps = (nullmat(ps), pvalue)
		return scalar pvalue`j' = pvalue
		local j = `j' + 1
		
		drop `toUse2' `t' `id'
	}

	** Notes
	di as text "{hline 30}{c BT}{hline 23}{c BT}{hline 16}{c BT}{hline 14}{c BRC}"
	if "`test'" == "Q(p)" {
		di _col(2) "Notes: Under H0, Q(p) ~ chi2(p)"
		di as txt _col(5) "H0: No serial correlation up to order p."
		di as txt _col(5) "Ha: Some serial correlation up to order p."
	}
	if "`test'" == "LM(k)" {
		di _col(2) "Notes: Under H0, LM(k) ~ N(0,1)"
		di as txt _col(5) "H0: No serial correlation of order k."
		di as txt _col(5) "Ha: Some serial correlation of order k."
	}
	
	** Return
	capture confirm matrix qps
	if _rc == 0	return matrix QP = qps
	capture confirm matrix lms
	if _rc == 0	return matrix LM = lms
	capture confirm matrix ps
	if _rc == 0	return matrix p = ps
	
	restore
end

mata:
	void qp_statistic(string scalar varname, string scalar toUse, real scalar T, real scalar N, real scalar p) {
		real matrix UE, ME, Z, ZMeeMZ, ZMe, innersum_inverse; real scalar n, QP, pvalue, firstcol, lastcol, k
		
		// ue = fe + e (currently we just use e, as it also seems to work)
		// me = e
		UE = rowshape(st_data(., varname, toUse), N)'
		ME = UE :- mean(UE)
		
		// Z
		Z = J(T, p*N, .)
		for(n=1; n<=N; n++) {
			firstcol = 1 + (n-1)*p
			for(k=1; k<=p; k++) {
				Z[., firstcol+k-1] = (J(k, 1, 0)\ME[1..T-k, n]) + (T-k)/(T^2 - T)*UE[.,n]
			}
		}

		// Inner sum
		ZMeeMZ = J(p,p,0)
		ZMe = J(p,1, 0)

		for(n=1; n<=N; n++) {
			firstcol = 1 + (n-1)*p
			lastcol = firstcol + p - 1
			ZMeeMZ = ZMeeMZ + Z[., firstcol..lastcol]' * ME[., n] * ME[., n]' * Z[., firstcol..lastcol]
			ZMe = ZMe + Z[., firstcol..lastcol]' * ME[., n]
		}

		innersum_inverse = invsym(ZMeeMZ - 1/N*ZMe*ZMe')
		
		// Qp-tilde		
		QP = ZMe' * innersum_inverse * ZMe
		pvalue = 1-chi2(p, QP)

		// Store results
		st_numscalar("QP", QP)
		st_numscalar("pvalue", pvalue)
	}
	
	void qp_statistic_unbalanced(string scalar varname, string scalar toUse, real scalar N, real scalar p) {
		real matrix UE, ME, Zi, ZMeeMZ, ZMe, innersum_inverse; real scalar n, QP, pvalue, k; real vector Ti
		
		UE = rowshape(st_data(., varname, toUse), N)'
		Ti = colnonmissing(UE)
		
		ZMeeMZ = J(p,p,0)
		ZMe = J(p,1, 0)
		for(n=1; n<=N; n++) {
			UEi = select(UE[.,n], UE[.,n]:!=.)
			MEi = UEi :- mean(UEi)
			Zi = J(Ti[n], p, .)
			for(k=1; k<=p; k++) {
				Zi[., k] = (J(k, 1, 0)\MEi[1..Ti[n]-k, 1]) + (Ti[n]-k)/(Ti[n]^2 - Ti[n])*UEi
			}
			ZMeeMZ = ZMeeMZ + Zi' * MEi * MEi' * Zi
			ZMe = ZMe + Zi' * MEi
		}
		
		innersum_inverse = invsym(ZMeeMZ - 1/N*ZMe*ZMe')	

		// Qp-tilde		
		QP = ZMe' * innersum_inverse * ZMe
		pvalue = 1-chi2(p, QP)

		// Store results
		st_numscalar("QP", QP)
		st_numscalar("pvalue", pvalue)
	}
end

mata:
	void lm_statistic(string scalar varname, string scalar toUse, real scalar N, real scalar k) {
		UE = rowshape(st_data(., varname, toUse), N)'
		E = J(rows(UE), cols(UE), .)
		for(n=1; n<=N; n++) {
			E[.,n] = UE[.,n] :- mean(UE[.,n])
		}
		
		T = rows(E)
		Ti = colnonmissing(E)
		
		Z = colsum(E[k+1..T, .]:*E[1..T-k, .] + (E[1..T-k, .]:^2):/(Ti:-1))

		// LM-tilde		
		LM = sum(Z)/sqrt(sum(Z:^2) - 1/N*(sum(Z))^2)
		pvalue = 2*(1-normal(abs(LM)))

		// Store results
		st_numscalar("LM", LM)
		st_numscalar("pvalue", pvalue)
	}
end
